
clear all
set more off
cap log close

log using "$log/06_01_create_u_rate_SIAB.log", replace

timer clear 1
timer on 1
*label lang en 

use "$orig\SIAB_7519_v1.dta", clear

set seed 666

tab quelle
keep if quelle == 1

keep if begorig == begepi
drop begepi endepi
rename erwstat beh_pers_gr 
gen spell_length = endorig - begorig + 1
gen year = year(begorig)

tab beh_pers_gr
keep if inlist(beh_pers_gr,102,141)
tab beh_pers_gr, mis

cap drop n
bysort persnr: gen n = _n
bysort persnr: egen nn = max(n)
drop if nn == 1 & spell_length < 183
drop n nn

sort persnr begorig endorig
local l = 1
while `l' > 0 {
*drop inclosed and parralel spells
drop if ///
persnr == persnr[_n-1] & ///
begorig[_n-1] <= begorig & /// 
endorig[_n-1] >= endorig & /// 
year == year[_n-1]
local l = r(N_drop) 
}

*******************************************************************************
* Combine spells within year if same betnr and 2-digit-job

tab beruf, mis
replace beruf = . if missing(beruf)
clonevar beh_beruf_num = beruf
drop beruf
gen beruf = floor(beh_beruf_num/10)
replace beruf = -1 if beh_beruf_num < 0 | inlist(beh_beruf_num,.,-5, 981,982)

bysort persnr begorig: gen n = _n
gen temp = .
replace temp = beruf if n == 1
by persnr: egen first_train_beruf = min(temp)
drop n temp

sort persnr begorig betnr beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
betnr == betnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig /// 
& year == year[_n-1] 

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells within year if same 2-digit-job

sort persnr begorig beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig  /// 
& year == year[_n-1] 

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells over year if same betnr and 2-digit-job

sort persnr begorig betnr beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
betnr == betnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells over year if same 2-digit-job

sort persnr begorig beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

su spell_length, de

bysort persnr: egen max_training = max(spell_length)
bysort persnr: egen min_training = min(spell_length)
bysort persnr: egen no_trainings  = count(persnr)

cap drop n
by persnr: gen n = _n
tab no_trainings if n == 1

drop if no_trainings == 1 & !inrange(spell_length,183,1465)

gen keep = (no_trainings == 1 & inrange(spell_length,183,1465))
tab keep
replace keep = 1 if no_trainings > 1 & inrange(spell_length,183,1465)
tab keep

bysort persnr: egen min_keep = max(keep)
cap drop n
bysort persnr: gen n = _n
tab min_keep if n == 1
drop min_keep n

drop if no_trainings > 1 & max_training < 183
drop if no_trainings > 1 & min_training > 1465

drop if no_trainings > 1 & spell_length < 183
drop if no_trainings > 1 & spell_length > 1465

drop min_training max_training no_trainings keep 

bysort persnr: egen no_trainings  = count(persnr)
cap drop n
bysort persnr: gene n = _n
tab no_trainings if n == 1

bysort persnr: egen min_endorig = min(endorig)
drop if no_trainings > 1 & endorig != min_endorig

drop min_endorig

gen year_entry     = year(endorig)
keep persnr year_entry
duplicates drop
tab year_entry, mis

compress
save "$temp/train_end_SIAB", replace
clear


********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

use "$orig\SIAB_7519_v1.dta", clear

tab ausbildung_imp, mis
replace ausbildung_imp = . if ausbildung_imp == -7
replace ausbildung_imp = 5 if ausbildung_imp ==  6
replace ausbildung_imp = . if missing(ausbildung_imp)

tab ausbildung_imp, mis
tab ausbildung if missing(ausbildung_imp), mis
tab schule if missing(ausbildung_imp), mis

replace ausbildung_imp = 1 if missing(ausbildung_imp) &  inlist(ausbildung,1,7,8)   & inlist(schule,1,4,6)
replace ausbildung_imp = 2 if missing(ausbildung_imp) &  inlist(ausbildung,2,3,4,9,25) & inlist(schule,1,4,6) 
replace ausbildung_imp = 3 if missing(ausbildung_imp) &  inlist(ausbildung,1,7,8)   & inlist(schule,7,8,9) 
replace ausbildung_imp = 4 if missing(ausbildung_imp) &  inlist(ausbildung,2,3,4,9,25) & inlist(schule,7,8,9) 
replace ausbildung_imp = 5 if missing(ausbildung_imp) & (inrange(ausbildung,10,24)  | inrange(ausbildung,5,6))
count if missing(ausbildung_imp) 
tab ausbildung_imp, mis

sort persnr spell
replace ausbildung_imp = ausbildung_imp[_n-1] if missing(ausbildung_imp) & persnr == persnr[_n-1]
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp < ausbildung_imp[_n-1] & persnr == persnr[_n-1]

count if missing(ausbildung_imp) 
tab ausbildung_imp, mis


gen byte x = (inlist(ausbildung_imp,2,4))
bysort persnr: egen byte trained = max(x)
drop x 
tab trained, mis

keep if trained == 1
tab ausbildung_imp, mis

keep if inlist(ausbildung_imp,2,4)
drop ausbildung_imp

merge m:1 persnr using "$temp/train_end_SIAB"
drop if _merge == 2
drop _merge

gen year = year(endepi)
gen spell_length = endepi-begepi+1

gen byte age = year(begepi)-gebjahr
drop gebjahr
drop if age < 15
drop if age > 66

*Youth variable
gen byte youth = (age<30)
drop age

*Create two digit occupation code (KlDB system)
replace beruf = . if beruf< 0
gen occ_2dig = floor(beruf/10)
tab occ_2dig, mis
drop beruf

*Merge industry and state
rename year jahr
merge m:1 betnr jahr using "$orig\SIAB_7519_v1_bhp_basis_v2.dta", keepusing(w08_3_gen ao_bula)
drop if _merge == 2
drop _merge
drop jahr
compress

gen ind_2dig = floor(w08_3_gen/10)
drop w08_3_gen

sort persnr spell
replace occ_2dig = occ_2dig[_n-1] if missing(occ_2dig) & !missing(occ_2dig[_n-1]) & persnr==persnr[_n-1] // assign previous job occupation
replace ind_2dig = ind_2dig[_n-1] if missing(ind_2dig) & !missing(ind_2dig[_n-1]) & persnr==persnr[_n-1] // assign previous industry
replace ao_bula  = ao_bula[_n-1]  if missing(ao_bula)  & !missing(ao_bula[_n-1])  & persnr==persnr[_n-1]

drop spell

drop if missing(occ_2dig)
drop if missing(ind_2dig)
drop if missing(ao_bula)

compress

*Drop if out of labor force
tab erwstat
replace erwstat = . if erwstat == 12 & year(endorig)>=2005
keep if ///
( erwstat == 101 | erwstat == 140 | erwstat== 143) | /// gr == 1: Sozialversicherungspfl. Beschäftigte
((erwstat == 109 | erwstat == 209) & year(begorig >= 1999)) | /// gr = 3: Geringfügig Beschäftigte
((erwstat == 103 | erwstat == 142) & year(begorig >= 1999)) | /// gr= 4: Altersteilzeit
((erwstat == 205 | erwstat == 118) & year(begorig >= 1999)) | /// gr = 6: Unständig Beschäftigte
( erwstat == 104 | erwstat == 112 | erwstat == 119 | erwstat == 120 | erwstat == 123 | erwstat == 124 | erwstat == 149 |     erwstat == 201 | erwstat == 203 | erwstat == 599) | /// gr = 7, merged with 6: Sonstige
erwstat == 1 | /// gr==11
erwstat == 2 | /// gr==12
erwstat == 31 | erwstat == 41 | erwstat == 51 | ///  gr==21
erwstat == 102 | erwstat== 141 // keep people in training, gr == 2
tab erwstat, mis

*Unemployment indicator
gen U_epi = (erwstat<100)
tab U_epi, mis

drop erwstat

gen year = year(begepi)
keep if inrange(year,1990,2019)
drop year

compress

keep persnr begepi endepi U_epi ao_bula occ_2dig ind_2dig trained youth spell_length year_entry 

save "$temp/test_09_SIAB", replace


******************************
* ASSIGN UNEMPLOYMENT STATUS *
******************************

*Start and end
global start = 1990
global end = 2019

local day = 15
local months "jan feb mar apr may jun jul aug sep oct nov dec"
forval year = $start (1) $end {
		use "$temp/test_09_SIAB", clear
		display "`year'"
		keep if year(begepi)<=`year' & year(endepi)>=`year'	
		drop if year_entry == year(begepi) // remove entry cohort
		
	foreach month in `months' {
		display "`month' `year'"
		qui {
		preserve 
			local refdate = date("`day'`month'`year'","DMY")
			keep if begepi<=`refdate' & endepi>=`refdate'		/* Keep if spell covers reference date */
			bysort persnr: egen U = min(U_epi)				  	/* If employed anywhere, count as E */
			keep if U==U_epi 									/* Keep spells that match status */
			bysort persnr: egen max_spell = max(spell_length)	/* Find max spell w/i status */
			keep if spell_length==max_spell						/* Keep longest spell to assign state */
			duplicates drop persnr, force
			gen obs = 1
			tab U
			collapse U (sum) obs, by(ao_bula occ_2dig ind_2dig trained youth)
			gen day = date("`day'`month'`year'","DMY")
			format day %td
			gen month = mofd(day)
			format month %tm
			tempfile `month'`year'
			save ``month'`year'', replace
		restore
		}
		}
	
	}

*Append months
clear
forval year = $start (1) $end {
	foreach month in `months' {
		append using ``month'`year''
		}
	}

gen year = year(day)
drop day
gen U_w = U*obs

rename ao_bula region_entry
rename year year_entry

save "$temp/U_internal_source_1990-2019.dta", replace

keep if inrange(year_entry,1997,2019)

save "$temp/U_internal_source.dta", replace

*Overall
use "$temp\U_internal_source.dta", clear
local cutoff = 20
collapse (sum) U_w obs, by(year_entry region_entry)
gen U_internal = 100*U_w/obs
xtset region_entry year_entry
tssmooth ma U_internal01 = U_internal, window(0 1 1)
keep year_entry region_entry obs U_internal*
drop if obs<`cutoff'

xtset region_entry year_entry

gen d_U_internal = D.U_internal
assert !missing(d_U_internal) if year_entry >= 1998

save "$temp\U_internal.dta", replace

clear
cap log close
